#__author:PC  自动获取用户名
#date: 2021/9/17 自动获取时间
import requests
import re
import execjs
import json

url = 'https://36kr.com/search/articles/%E6%94%B6%E8%B4%AD'
headers = {
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'accept-encoding': 'gzip, deflate, br',
    'accept-language': 'zh-CN,zh;q=0.9',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36'
}
response = requests.get(url,headers)
print(response.text)
arg1 = re.search('arg1=\'[0-9A-Z]+\'', response.text).group().replace('arg1=', '').replace('\'', '')
print(arg1)

js_content = execjs.compile(open(r'36kr.js', 'r', encoding='utf-8').read())
cookie = js_content.call('return_result', arg1)
print(cookie)
cookies = {'acw_sc__v2': cookie}
response = requests.get(url, headers=headers, cookies=cookies)
content = re.findall('<script>window.initialState=(.*)</script>',response.text)[0]
json_content = json.loads(content)
items = json_content['searchResultData']['data']['searchResult']['data']['itemList']
for item in items:
    print(item)